################################################################################
##
## Purpose: This script uses ChatGPT to help evaluate randomly selected pairwise
##          comparisons of Yellen and other speakers' tones. It is included for 
##          reference, but is not part of the replication materials due to the
##          reliance on an API key.
##
## Author: James Bisbee (james.h.bisbee@vanderbilt.edu)
##
##  - Inputs:
##    - ./data/prepped/finalData.RData: Prepped data from 9_DATA_final_build.R
##    - ./data/prepped/chatGPT_polite_BTM_aggression.RData: Saved intermediate file from this script  
##  - Outputs:
##    - ./data/prepped/prepped_chunks_for_GPT.RData
##    - ./data/prepped/chatGPT_polite_BTM_aggression.RData
##    - ./output/chatGPT_polite_BTM_aggression_cleaned.RData
##
################################################################################

rm(list = ls())
gc()
require(tidyverse)
require(ggridges)
library(openai)
require(BradleyTerry2)
require(fixest)
require(qvcalc)

set.seed(123)

# Compute details
print(paste0('Compute environment from ',Sys.Date(),' run by Bisbee'))
if(Sys.info()['sysname'] == 'Windows') {
  ram_size = system("wmic MemoryChip get Capacity", intern = TRUE)[-1]
  model_name = system("wmic cpu get name", intern = TRUE)[2] # nocov
  vendor_id = system("wmic cpu get manufacturer", intern = TRUE)[2] # nocov
  
  print(list(ram = stringr::str_squish(ram_size)[1],
             vendor_id = stringr::str_squish(vendor_id),
             model_name = stringr::str_squish(model_name),
             no_of_cores = parallel::detectCores()))
} else if(Sys.info()['sysname'] == 'Linuxs') {
  splitted <- strsplit(system("ps -C rsession -o %cpu,%mem,pid,cmd", intern = TRUE), " ")
  df <- do.call(rbind, lapply(splitted[-1], 
                              function(x) data.frame(
                                cpu = as.numeric(x[2]),
                                mem = as.numeric(x[4]),
                                pid = as.numeric(x[5]),
                                cmd = paste(x[-c(1:5)], collapse = " "))))
  df
} else {
  cat("If not on Linux or Windows, you'll have to figure out your own solution to seeing the compute environment.")
}

sessionInfo()


load('./data/prepped/finalData.RData')


Sys.setenv(OPENAI_API_KEY = '') # Enter OpenAI key here


create_prompt <- function(chunk,direction = 'more') {
  res <- list(
    list(
      "role" = "system",
      "content" = "An aggressive communication style is a way of communicating with others than involves assertiveness, dominance, bluntness, verbal attacks, ignoring boundaires, hostility, lack of empathy, manipulation, and defensiveness."
    ),
    list(
      "role" = "user",
      "content" = stringr::str_c(
        'Please read the following conversations between a chair of the Federal Reserve and a member of Congress. Out of the two examples, which conversation is ',
        direction,' aggressive overall? Within the selected conversation, which speaker is more aggressive?\n\n',
        chunk)
    )
  )
  return(res)
}

submit_openai <- function(prompt, temperature = 0.2, n = 1) {
  res <- openai::create_chat_completion(model = "gpt-3.5-turbo",
                                        messages = prompt,
                                        temperature = temperature,
                                        n = n)
  Sys.sleep(1)
  res
}


# Can we do this pairwise?
toSample <- utterance_level %>%
  arrange(docID,ind) %>%
  select(docID,chamber,date,speaker,opensecretsID,ind,nchars,textclean) %>%
  group_by(docID) %>%
  mutate(firstFED = ifelse(grepl("FED",opensecretsID),ind,NA)) %>%
  mutate(firstFED = min(firstFED,na.rm=T)) %>%
  filter(ind >= firstFED) %>%
  arrange(docID,ind) %>%
  select(docID,chamber,date,ind,speaker,textclean,opensecretsID) %>%
  mutate(delta = ind - lag(ind)) %>%
  filter(delta == 1) %>%
  mutate(delta2 = ind - lag(ind)) %>%
  mutate(delta2 = ifelse(is.na(delta2),1,delta2)) %>%
  mutate(chunkIndicator = cumsum(speaker != lag(speaker,2,default = speaker[1]))) %>%
  mutate(chunkIndicator = ifelse(chunkIndicator == (lead(chunkIndicator) - 1), lead(chunkIndicator),chunkIndicator)) %>%
  mutate(chunkIndicator = ifelse(chunkIndicator == (lag(chunkIndicator) + 1) & (chunkIndicator == lead(chunkIndicator) - 1),
                                 lag(chunkIndicator),chunkIndicator)) %>%
  group_by(docID,chunkIndicator) %>%
  mutate(n = n()) %>%
  ungroup() %>%
  mutate(nchars = nchar(textclean)) %>%
  filter(n > 2) %>%
  mutate(textclean = paste0(gsub('\\.$','',speaker),': ',textclean)) %>%
  group_by(docID,chamber,date,chunkIndicator,n) %>%
  summarise(text = paste(textclean,collapse = '\n')) %>%
  ungroup() %>%
  mutate(fed = ifelse(date < as.Date('2006-01-01'),'Greenspan',
                      ifelse(date < as.Date('2014-01-01'),'Bernanke',
                             ifelse(date < as.Date('2018-01-01'),'Yellen','Powell')))) %>%
  mutate(nchars = nchar(text)) %>%
  rowwise() %>%
  filter(grepl(fed,text)) %>%
  ungroup()

set.seed(123)
chunks <- list()
counter <- 1
for(nConv in c(3:12)) {
  qntls <- quantile(toSample %>%
                      filter(n == nConv) %>%
                      pull(nchars))

  for(q in 2:length(qntls)) {
    tmp <- toSample %>%
      filter(n == nConv,
             nchars < qntls[q],
             nchars > qntls[q-1])

    for(i in 1:50) {
      tmp2 <- tmp %>%
        group_by(fed) %>%
        sample_n(size = 1) %>%
        ungroup() %>%
        sample_n(size = 2) %>%
        slice(sample(1:2,2))

      chunks[[counter]] <- list()
      chunks[[counter]]$chunk <- paste(paste0('Conversation ',1:2,':\n',
                                              tmp2 %>% pull(text),collapse = '\n\n'))
      chunks[[counter]]$srcs <- tmp2
      counter <- counter + 1
    }
  }
}

test <- NULL
for(i in 1:length(chunks)) {
  test <- test %>%
    bind_rows(chunks[[i]]$srcs %>%
                mutate(index = i,
                       rown = row_number()))
}

save(chunks,file = './data/prepped/prepped_chunks_for_GPT.RData')

res <- NULL
for(i in 1:length(chunks)) {
  cat('----------------------------\n',i,'\n----------------------------\n')
  for(d in c('more','less')) {
    for(rev in c(T,F)) {
      if(rev) {
        torev <- str_split(chunks[[i]]$chunk,pattern = '(\n\nConversation 2:)')[[1]]
        chnk <- paste(paste0('Conversation 1:',torev[2]),
                      gsub('Conversation 1','Conversation 2',torev[1]),sep = '\n\n')
        srcs <- chunks[[i]]$srcs %>% slice(2,1)
      } else {
        chnk <- chunks[[i]]$chunk
        srcs <- chunks[[i]]$srcs
      }

      prompts <- create_prompt(chunk = chnk,direction = d)
      if(nchar(prompts[[2]]$content) > 10000) { next }

      Sys.sleep(2)
      system.time(openai_completions <- try(submit_openai(prompt = prompts,temperature = 0,n = 1)))

      while(class(openai_completions) == 'try-error') {
        Sys.sleep(5)
        system.time(openai_completions <- try(submit_openai(prompt = prompts,temperature = 0,n = 1)))
      }

      res <- res %>%
        bind_rows(srcs %>%
                    select(-docID,-chunkIndicator) %>%
                    mutate(id = row_number()) %>%
                    pivot_wider(names_from = id,values_from = c('date','n','text','fed','nchars','chamber')) %>%
                    mutate(explanation = openai_completions$choices$message.content,
                           direction = d,
                           reversed = rev))
    }
  }
}

save(res,file = './data/prepped/chatGPT_polite_BTM_aggression.RData')


rm(list = ls())
load('./data/prepped/chatGPT_polite_BTM_aggression.RData')


create_prompt <- function(chunk) {
  res <- list(
    list(
      "role" = "system",
      "content" = "You are a helpful AI assistant."
    ),
    list(
      "role" = "user",
      "content" = stringr::str_c(
        'The following is a summary assessment of which of two conversations contains more aggressive language.
        Please extract just the number of the conversation that is deemed to be more aggressive.
        If there is not enough information provide, return a -1. Do not return any text in your response.\n"',
        chunk,'"')
    )
  )
  return(res)
}

submit_openai <- function(prompt, temperature = 0, n = 1) {
  res <- openai::create_chat_completion(model = "gpt-3.5-turbo",
                                        messages = prompt,
                                        temperature = temperature,
                                        n = n)
  Sys.sleep(1)
  res
}

cleanRes <- list()
res$cleaned <- NA
for(i in which(is.na(res$cleaned))) {
  Sys.sleep(2)
  cat('----------------------------\n',i,'\n----------------------------\n')
  prompts <- create_prompt(res %>% slice(i) %>% pull(explanation))

  system.time(openai_completions <- try(submit_openai(prompt = prompts,temperature = 0,n = 1)))
  while(class(openai_completions) == 'try-error') {
    Sys.sleep(5)
    system.time(openai_completions <- try(submit_openai(prompt = prompts,temperature = 0,n = 1)))
  }
  cleanRes[[i]] <- openai_completions$choices$message.content
  res$cleaned[i] <- cleanRes[[i]]
}

save(res,file = './output/chatGPT_polite_BTM_aggression_cleaned.RData')

# EOF